Importing Libraries and Datasets of Steam Top 100 Games
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
pd.pandas.set_option('display.max_columns',None)
df = pd.read_csv('C:/Users/SOUBHIK MANDAL/Desktop/jupyter notebook/Data Analysis of Steam Top 100 Games/steam_top_100.csv')
df
| Steam id | Game | Current players | Peak players today | Release date | Review summary | Total reviews | Tags | |
|---|---|---|---|---|---|---|---|---|
| 0 | 730 | Counter-Strike: Global Offensive | 736875 | 775266 | 21 Aug 2012 | Very Positive | 5993058.0 | FPS:Shooter:Multiplayer:Competitive:Action:Tea... |
| 1 | 570 | Dota 2 | 580933 | 649690 | 9 Jul 2013 | Very Positive | 1599180.0 | Free to Play:MOBA:Multiplayer:Strategy:eSports... |
| 2 | 1063730 | New World | 243815 | 307146 | 28 Sep 2021 | Mostly Positive | 145931.0 | Massively Multiplayer:Open World:MMORPG:Advent... |
| 3 | 1172470 | Apex Legends | 121313 | 245235 | 4 Nov 2020 | Very Positive | 338236.0 | Free to Play:Battle Royale:Multiplayer:Shooter... |
| 4 | 440 | Team Fortress 2 | 104477 | 111884 | 10 Oct 2007 | Very Positive | 821568.0 | Free to Play:Hero Shooter:Multiplayer:FPS:Shoo... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 95 | 1286830 | STAR WARS™: The Old Republic™ | 7104 | 9345 | 21 Jul 2020 | Very Positive | 33098.0 | Free to Play:MMORPG:Character Customization:Mu... |
| 96 | 1644960 | NBA 2K22 | 7020 | 13139 | 9 Sep 2021 | Mixed | 6266.0 | Sports:Basketball:Simulation:eSports:Realistic... |
| 97 | 244210 | Assetto Corsa | 6965 | 8707 | 19 Dec 2014 | Very Positive | 42497.0 | Racing:Automobile Sim:Simulation:Driving:Reali... |
| 98 | 275850 | No Man's Sky | 6963 | 10122 | 12 Aug 2016 | Mostly Positive | 168792.0 | Open World:Open World Survival Craft:Space:Exp... |
| 99 | 1238810 | Battlefield™ V | 6724 | 33310 | 22 Oct 2020 | Mostly Positive | 68109.0 | FPS:World War II:Shooter:Multiplayer:War:Singl... |
100 rows × 8 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 100 entries, 0 to 99 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Steam id 100 non-null int64 1 Game 100 non-null object 2 Current players 100 non-null int64 3 Peak players today 100 non-null int64 4 Release date 99 non-null object 5 Review summary 99 non-null object 6 Total reviews 99 non-null float64 7 Tags 98 non-null object dtypes: float64(1), int64(3), object(4) memory usage: 6.4+ KB
Missing Values
features_with_mvals=[feature for feature in df.columns if df[feature].isnull().sum()>0]
for feature in features_with_mvals:
print(feature, np.round(df[feature].isnull().mean(), 4), ' % missing values')
Release date 0.01 % missing values Review summary 0.01 % missing values Total reviews 0.01 % missing values Tags 0.02 % missing values
df['Release date'].fillna(method='ffill', inplace=True)
df['Review summary'].fillna('NA', inplace=True)
df['Total reviews'].fillna(df['Total reviews'].mean(), inplace=True)
df['Tags'].fillna('NA', inplace=True)
DateTime Feature
df['Release date'] = pd.to_datetime(df['Release date'])
df['Release year'] = df['Release date'].dt.year
df
| Steam id | Game | Current players | Peak players today | Release date | Review summary | Total reviews | Tags | Release year | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 730 | Counter-Strike: Global Offensive | 736875 | 775266 | 2012-08-21 | Very Positive | 5993058.0 | FPS:Shooter:Multiplayer:Competitive:Action:Tea... | 2012 |
| 1 | 570 | Dota 2 | 580933 | 649690 | 2013-07-09 | Very Positive | 1599180.0 | Free to Play:MOBA:Multiplayer:Strategy:eSports... | 2013 |
| 2 | 1063730 | New World | 243815 | 307146 | 2021-09-28 | Mostly Positive | 145931.0 | Massively Multiplayer:Open World:MMORPG:Advent... | 2021 |
| 3 | 1172470 | Apex Legends | 121313 | 245235 | 2020-11-04 | Very Positive | 338236.0 | Free to Play:Battle Royale:Multiplayer:Shooter... | 2020 |
| 4 | 440 | Team Fortress 2 | 104477 | 111884 | 2007-10-10 | Very Positive | 821568.0 | Free to Play:Hero Shooter:Multiplayer:FPS:Shoo... | 2007 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 95 | 1286830 | STAR WARS™: The Old Republic™ | 7104 | 9345 | 2020-07-21 | Very Positive | 33098.0 | Free to Play:MMORPG:Character Customization:Mu... | 2020 |
| 96 | 1644960 | NBA 2K22 | 7020 | 13139 | 2021-09-09 | Mixed | 6266.0 | Sports:Basketball:Simulation:eSports:Realistic... | 2021 |
| 97 | 244210 | Assetto Corsa | 6965 | 8707 | 2014-12-19 | Very Positive | 42497.0 | Racing:Automobile Sim:Simulation:Driving:Reali... | 2014 |
| 98 | 275850 | No Man's Sky | 6963 | 10122 | 2016-08-12 | Mostly Positive | 168792.0 | Open World:Open World Survival Craft:Space:Exp... | 2016 |
| 99 | 1238810 | Battlefield™ V | 6724 | 33310 | 2020-10-22 | Mostly Positive | 68109.0 | FPS:World War II:Shooter:Multiplayer:War:Singl... | 2020 |
100 rows × 9 columns
Top 10 Games based on Current Players
cp = df.sort_values(by='Current players', ascending=False)
fig = px.bar(cp.head(10), x='Game', y='Current players',color='Current players')
fig.show()
Top 10 Games based on Peak Players Today
ppt = df.sort_values(by='Peak players today', ascending=False)
fig = px.bar(ppt.head(10), x='Game', y='Peak players today',color='Peak players today')
fig.show()
Top 10 Games based on Total Reviews
tr = df.sort_values(by='Total reviews', ascending=False)
fig = px.bar(tr.head(10), x='Game', y='Total reviews',color='Total reviews')
fig.show()
df['Review summary'].unique()
array(['Very Positive', 'Mostly Positive', 'Mixed',
'Overwhelmingly Positive', 'NA'], dtype=object)
Games with Overwhelmingly Positive Review
fig = px.pie(df.loc[df['Review summary'] == 'Overwhelmingly Positive'], values='Current players', names='Game', title='Top 10 Games with Overwhelmingly Positive Review')
fig.show()
Games with Very Positive Review
fig = px.pie(df.loc[df['Review summary'] == 'Very Positive'].head(20), values='Current players', names='Game', title='Top 10 Games with Very Positive Review')
fig.show()